dyn.load("/home/moyra.lawrence/miniconda3/envs/monocle3/lib/libgeos_c.so.1")
library(Seurat)
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.11.0-CAPI-1.17.0
## and GEOS at installation 3.8.0-CAPI-1.13.1differ
library(patchwork)
library(ggplot2)
library(gridExtra)
library(openxlsx)
library(svglite)
library(dplyr)
library(gt)
library(tidyverse)
library(stringi)
library(SeuratObject)
raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)
names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names
colnames(raw_data) <- gsub("_", "-", colnames(raw_data))
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "_", replacement = "-")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy1", replacement = "zygote-1")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy2", replacement = "zygote-2")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy3", replacement = "zygote-3")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy4", replacement = "zygote-4")
ML11 <- CreateSeuratObject(counts=raw_data, project="ML11", names.delim="-")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
ML11$orig.ident
## 16cell-1-10 16cell-1-11 16cell-1-12
## 16cell 16cell 16cell
## 16cell-1-13 16cell-1-14 16cell-1-15
## 16cell 16cell 16cell
## 16cell-1-2 16cell-1-3 16cell-1-4
## 16cell 16cell 16cell
## 16cell-1-5 16cell-1-6 16cell-1-7
## 16cell 16cell 16cell
## 16cell-1-8 16cell-1-9 16cell-4-1
## 16cell 16cell 16cell
## 16cell-4-10 16cell-4-11 16cell-4-2
## 16cell 16cell 16cell
## 16cell-4-3 16cell-4-4 16cell-4-5
## 16cell 16cell 16cell
## 16cell-4-6 16cell-4-7 16cell-4-8
## 16cell 16cell 16cell
## 16cell-4-9 16cell-5-1 16cell-5-10
## 16cell 16cell 16cell
## 16cell-5-11 16cell-5-12 16cell-5-13
## 16cell 16cell 16cell
## 16cell-5-2 16cell-5-3 16cell-5-4
## 16cell 16cell 16cell
## 16cell-5-5 16cell-5-6 16cell-5-7
## 16cell 16cell 16cell
## 16cell-5-8 16cell-5-9 16cell-6-1
## 16cell 16cell 16cell
## 16cell-6-10 16cell-6-11 16cell-6-12
## 16cell 16cell 16cell
## 16cell-6-2 16cell-6-3 16cell-6-4
## 16cell 16cell 16cell
## 16cell-6-5 16cell-6-6 16cell-6-7
## 16cell 16cell 16cell
## 16cell-6-8 16cell-6-9 4cell-1-1
## 16cell 16cell 4cell
## 4cell-1-2 4cell-1-4 4cell-2-1
## 4cell 4cell 4cell
## 4cell-2-2 4cell-2-3 4cell-2-4
## 4cell 4cell 4cell
## 4cell-3-1 4cell-3-3 4cell-3-4
## 4cell 4cell 4cell
## 4cell-4-1 4cell-4-2 4cell-4-3
## 4cell 4cell 4cell
## 4cell-4-4 8cell-1-1 8cell-1-2
## 4cell 8cell 8cell
## 8cell-1-4 8cell-1-5 8cell-1-6
## 8cell 8cell 8cell
## 8cell-1-7 8cell-1-8 8cell-2-1
## 8cell 8cell 8cell
## 8cell-2-2 8cell-2-3 8cell-2-4
## 8cell 8cell 8cell
## 8cell-2-6 8cell-2-7 8cell-2-8
## 8cell 8cell 8cell
## 8cell-5-1 8cell-5-2 8cell-5-3
## 8cell 8cell 8cell
## 8cell-5-4 8cell-5-6 8cell-5-7
## 8cell 8cell 8cell
## 8cell-5-8 8cell-8-1 8cell-8-2
## 8cell 8cell 8cell
## 8cell-8-3 8cell-8-4 8cell-8-6
## 8cell 8cell 8cell
## 8cell-8-7 8cell-8-8 BXC-100pg-liver-RNA-1
## 8cell 8cell BXC
## BXC-100pg-liver-RNA-2 BXC-10pg-liver-RNA-1 BXC-10pg-liver-RNA-2
## BXC BXC BXC
## BXC-1ng-liver-RNA-0r BXC-1ng-liver-RNA-1 BXC-30pg-liver-RNA-0r
## BXC BXC BXC
## BXC-30pg-liver-RNA-2 BXC-liver-cell-1 BXC-liver-cell-2
## BXC BXC BXC
## BXC-liver-cell-4 BXC-liver-cell-5 BXC-liver-cell-6
## BXC BXC BXC
## C57twocell-16-1 C57twocell-16-2 C57twocell-18-1
## C57twocell C57twocell C57twocell
## C57twocell-18-2 C57twocell-20-1 C57twocell-20-2
## C57twocell C57twocell C57twocell
## C57twocell-22-1 C57twocell-22-2 early2cell-0r-1
## C57twocell C57twocell early2cell
## early2cell-0r-2 early2cell-1-1 early2cell-1-2
## early2cell early2cell early2cell
## early2cell-2-1 early2cell-2-2 early2cell-3-1
## early2cell early2cell early2cell
## early2cell-3-2 earlyblast-2-1 earlyblast-2-10
## early2cell earlyblast earlyblast
## earlyblast-2-12 earlyblast-2-15 earlyblast-2-16
## earlyblast earlyblast earlyblast
## earlyblast-2-17 earlyblast-2-19 earlyblast-2-2
## earlyblast earlyblast earlyblast
## earlyblast-2-22 earlyblast-2-3 earlyblast-2-4
## earlyblast earlyblast earlyblast
## earlyblast-2-5 earlyblast-2-6 earlyblast-2-7
## earlyblast earlyblast earlyblast
## earlyblast-2-8 earlyblast-3-1 earlyblast-3-10
## earlyblast earlyblast earlyblast
## earlyblast-3-12 earlyblast-3-13 earlyblast-3-14
## earlyblast earlyblast earlyblast
## earlyblast-3-15 earlyblast-3-16 earlyblast-3-17
## earlyblast earlyblast earlyblast
## earlyblast-3-2 earlyblast-3-3 earlyblast-3-4
## earlyblast earlyblast earlyblast
## earlyblast-3-6 earlyblast-3-7 earlyblast-3-8
## earlyblast earlyblast earlyblast
## earlyblast-3-9 earlyblast-4-1 earlyblast-4-12
## earlyblast earlyblast earlyblast
## earlyblast-4-13 earlyblast-4-14 earlyblast-4-16
## earlyblast earlyblast earlyblast
## earlyblast-4-17 earlyblast-4-18 earlyblast-4-2
## earlyblast earlyblast earlyblast
## earlyblast-4-3 earlyblast-4-5 earlyblast-4-6
## earlyblast earlyblast earlyblast
## earlyblast-4-8 earlyblast-4-9 late2cell-5-1
## earlyblast earlyblast late2cell
## late2cell-5-2 late2cell-6-1 late2cell-6-2
## late2cell late2cell late2cell
## late2cell-7-1 late2cell-7-2 late2cell-8-1
## late2cell late2cell late2cell
## late2cell-8-2 late2cell-9-1 late2cell-9-2
## late2cell late2cell late2cell
## lateblast-1-10 lateblast-1-11 lateblast-1-13
## lateblast lateblast lateblast
## lateblast-1-14 lateblast-1-16 lateblast-1-19
## lateblast lateblast lateblast
## lateblast-1-2 lateblast-1-20 lateblast-1-21
## lateblast lateblast lateblast
## lateblast-1-23 lateblast-1-24 lateblast-1-26
## lateblast lateblast lateblast
## lateblast-1-27 lateblast-1-4 lateblast-1-5
## lateblast lateblast lateblast
## lateblast-1-6 lateblast-1-7 lateblast-1-8
## lateblast lateblast lateblast
## lateblast-1-9 lateblast-2-1 lateblast-2-12
## lateblast lateblast lateblast
## lateblast-2-14 lateblast-2-16 lateblast-2-17
## lateblast lateblast lateblast
## lateblast-2-2 lateblast-2-3 lateblast-2-5
## lateblast lateblast lateblast
## lateblast-2-7 lateblast-2-8 lateblast-2-9
## lateblast lateblast lateblast
## mid2cell-0r-1 mid2cell-0r-2 mid2cell-3-1
## mid2cell mid2cell mid2cell
## mid2cell-3-2 mid2cell-4-1 mid2cell-4-2
## mid2cell mid2cell mid2cell
## mid2cell-5-1 mid2cell-5-2 mid2cell-6-1
## mid2cell mid2cell mid2cell
## mid2cell-6-2 mid2cell-7-1 mid2cell-7-2
## mid2cell mid2cell mid2cell
## midblast-1-1 midblast-1-10 midblast-1-11
## midblast midblast midblast
## midblast-1-12 midblast-1-13 midblast-1-14
## midblast midblast midblast
## midblast-1-15 midblast-1-16 midblast-1-17
## midblast midblast midblast
## midblast-1-18 midblast-1-19 midblast-1-2
## midblast midblast midblast
## midblast-1-20 midblast-1-22 midblast-1-23
## midblast midblast midblast
## midblast-1-24 midblast-1-3 midblast-1-4
## midblast midblast midblast
## midblast-1-5 midblast-1-6 midblast-1-8
## midblast midblast midblast
## midblast-1-9 midblast-2-1 midblast-2-10
## midblast midblast midblast
## midblast-2-11 midblast-2-12 midblast-2-13
## midblast midblast midblast
## midblast-2-14 midblast-2-15 midblast-2-16
## midblast midblast midblast
## midblast-2-17 midblast-2-18 midblast-2-2
## midblast midblast midblast
## midblast-2-23 midblast-2-24 midblast-2-3
## midblast midblast midblast
## midblast-2-4 midblast-2-5 midblast-2-6
## midblast midblast midblast
## midblast-2-7 midblast-2-8 midblast-2-9
## midblast midblast midblast
## midblast-3-1 midblast-3-10 midblast-3-11
## midblast midblast midblast
## midblast-3-12 midblast-3-13 midblast-3-14
## midblast midblast midblast
## midblast-3-15 midblast-3-17 midblast-3-18
## midblast midblast midblast
## midblast-3-2 midblast-3-23 midblast-3-3
## midblast midblast midblast
## midblast-3-4 midblast-3-5 midblast-3-6
## midblast midblast midblast
## midblast-3-7 midblast-3-8 midblast-3-9
## midblast midblast midblast
## zygote-1 zygote-2 zygote-3
## zygote zygote zygote
## zygote-4 16cell-2pooled-split6a 16cell-2pooled-split6b
## zygote 16cell 16cell
## 16cell-2pooled-split8a 16cell-2pooled-split8b 16cell-split1a
## 16cell 16cell 16cell
## 16cell-split1b 16cell-split2a 16cell-split2b
## 16cell 16cell 16cell
## 8cell-12-1-smartseq2 8cell-12-2-smartseq2 8cell-12-3-smartseq2
## 8cell 8cell 8cell
## 8cell-12-4-smartseq2 8cell-13-1-smartseq2 8cell-14-1-smartseq2
## 8cell 8cell 8cell
## 8cell-14-2-smartseq2 8cell-14-3-smartseq2 8cell-14-4-smartseq2
## 8cell 8cell 8cell
## 8cell-2pooled-split5a 8cell-2pooled-split5b 8cell-2pooled-split7a
## 8cell 8cell 8cell
## 8cell-2pooled-split7b 8cell-2pooled-split9a 8cell-2pooled-split9b
## 8cell 8cell 8cell
## 8cell-split3a 8cell-split3b 8cell-split4a
## 8cell 8cell 8cell
## 8cell-split4b fibroblast-13-CxB fibroblast-14-CxB
## 8cell fibroblast fibroblast
## fibroblast-15-CxB fibroblast-16-CxB fibroblast-17-BxC
## fibroblast fibroblast fibroblast
## fibroblast-19-BxC fibroblast-20-BxC fibroblast-21-BxC
## fibroblast fibroblast fibroblast
## fibroblast-22-BxC fibroblast-9-CxB
## fibroblast fibroblast
## 13 Levels: 16cell 4cell 8cell BXC C57twocell early2cell ... zygote
ML11[["in.vivo"]] <- Idents(object = ML11)
ML11$orig.ident <- "ML11"
ML1 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Empty_vector3/seurat.obj.rds')
ML2 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Dppa2/seurat.obj.rds')
ML3 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Gata3/seurat.obj.rds')
ML4 <- read_rds(file='/home/moyra.lawrence/count/enumerate/MERVL_VP64/seurat.obj.rds')
ML5 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Empty_puro/seurat.obj.rds')
ML6 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Empty_hygro/seurat.obj.rds')
ML7 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_Gata3/seurat.obj.rds')
ML8 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_MERVL/seurat.obj.rds')
ML9 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Gata3_MERVL/seurat.obj.rds')
ML10 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_Gata3_MERVL/seurat.obj.rds')
ML12 <- read_rds(file='/home/moyra.lawrence/count3/enumerate/EVEV/seurat.obj.rds')
ML13 <- read_rds(file='/home/moyra.lawrence/count3/enumerate/Dppa2_MERVL_TT/seurat.obj.rds')
ML1
## An object of class Seurat
## 32294 features across 4406 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML2
## An object of class Seurat
## 32294 features across 3977 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML3
## An object of class Seurat
## 32294 features across 4835 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML4
## An object of class Seurat
## 32294 features across 3501 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML5
## An object of class Seurat
## 32294 features across 4921 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML6
## An object of class Seurat
## 32294 features across 4179 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML7
## An object of class Seurat
## 32294 features across 5105 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML8
## An object of class Seurat
## 32294 features across 3117 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML9
## An object of class Seurat
## 32294 features across 4678 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML10
## An object of class Seurat
## 32294 features across 3210 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML12
## An object of class Seurat
## 32294 features across 3438 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML13
## An object of class Seurat
## 32294 features across 3958 samples within 1 assay
## Active assay: RNA (32294 features, 0 variable features)
ML12[["percent.mt"]] <- PercentageFeatureSet(ML12, pattern = "^mt-")
VlnPlot(ML12, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML12, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML12, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML12, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
ML13[["percent.mt"]] <- PercentageFeatureSet(ML13, pattern = "^mt-")
VlnPlot(ML13, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML13, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML13, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML13, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
##Filtering
ML1[["percent.mt"]] <- PercentageFeatureSet(ML1, pattern = "^mt-")
ML2[["percent.mt"]] <- PercentageFeatureSet(ML2, pattern = "^mt-")
ML3[["percent.mt"]] <- PercentageFeatureSet(ML3, pattern = "^mt-")
ML4[["percent.mt"]] <- PercentageFeatureSet(ML4, pattern = "^mt-")
ML5[["percent.mt"]] <- PercentageFeatureSet(ML5, pattern = "^mt-")
ML6[["percent.mt"]] <- PercentageFeatureSet(ML6, pattern = "^mt-")
ML7[["percent.mt"]] <- PercentageFeatureSet(ML7, pattern = "^mt-")
ML8[["percent.mt"]] <- PercentageFeatureSet(ML8, pattern = "^mt-")
ML9[["percent.mt"]] <- PercentageFeatureSet(ML9, pattern = "^mt-")
ML10[["percent.mt"]] <- PercentageFeatureSet(ML10, pattern = "^mt-")
3500 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 10
2000 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 12
ML1.filtered <- subset(ML1, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML2.filtered <- subset(ML2, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML3.filtered <- subset(ML3, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML4.filtered <- subset(ML4, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML5.filtered <- subset(ML5, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML6.filtered <- subset(ML6, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML7.filtered <- subset(ML7, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML8.filtered <- subset(ML8, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML9.filtered <- subset(ML9, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML10.filtered <- subset(ML10, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML12.filtered <- subset(ML12, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML13.filtered <- subset(ML13, subset = nFeature_RNA > 2000 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 12)
VlnPlot(ML12.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML12.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML12.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML12.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
VlnPlot(ML13.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML13.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML13.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML13.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
merge.filtered <- merge(ML1.filtered, y = c(ML2.filtered, ML3.filtered, ML4.filtered, ML5.filtered, ML6.filtered, ML7.filtered, ML8.filtered, ML9.filtered, ML10.filtered, ML11, ML12.filtered, ML13.filtered), add.cell.ids = c("Empty_hygro_1", "Dppa2", "Gata3", "MERVL", "Empty_puro", "Empty_hygro_2", "Dppa2_Gata3", "Dppa2_MERVL", "Gata3_MERVL", "Dppa2_Gata3_MERVL", "Embryo", "Empty_puro_Empty_hygro", "Dppa2_MERVL_TT"), project = "ML1")
merge.filtered
## An object of class Seurat
## 36332 features across 39324 samples within 1 assay
## Active assay: RNA (36332 features, 0 variable features)
head(colnames(merge.filtered))
## [1] "Empty_hygro_1_AAACCCAAGGAAGTAG.1" "Empty_hygro_1_AAACCCAGTCGCACGT.1"
## [3] "Empty_hygro_1_AAACCCAGTGTGCTTA.1" "Empty_hygro_1_AAACCCATCTGGTCAA.1"
## [5] "Empty_hygro_1_AAACGAAAGTAACCGG.1" "Empty_hygro_1_AAACGAAGTCCTGGTG.1"
tail(colnames(merge.filtered))
## [1] "Dppa2_MERVL_TT_TTTGGAGCATCCCACT.1" "Dppa2_MERVL_TT_TTTGGAGGTCTGTCAA.1"
## [3] "Dppa2_MERVL_TT_TTTGGTTCACGTAGTT.1" "Dppa2_MERVL_TT_TTTGGTTGTTTGTTCT.1"
## [5] "Dppa2_MERVL_TT_TTTGTTGCAGCTGCCA.1" "Dppa2_MERVL_TT_TTTGTTGTCACTCTTA.1"
unique(sapply(X = strsplit(colnames(merge.filtered), split = "_"), FUN = "[", 1))
## [1] "Empty" "Dppa2" "Gata3" "MERVL" "Embryo"
table(merge.filtered$orig.ident)
##
## Dppa2 Dppa2_Gata3 Dppa2_Gata3_MERVL Dppa2_MERVL
## 3153 4315 2665 2090
## Dppa2_MERVL_TT Empty_hygro Empty_puro Empty_vector3
## 2072 3565 4144 3664
## EVEV Gata3 Gata3_MERVL MERVL_VP64
## 2448 3956 3888 3047
## ML11
## 317
table(merge.filtered$in.vivo)
##
## 16cell 4cell 8cell BXC C57twocell early2cell earlyblast
## 58 14 47 13 8 8 43
## fibroblast late2cell lateblast mid2cell midblast zygote
## 10 10 30 12 60 4
vp1 <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA")) + theme(legend.position = 'none')
vp2 <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA")) + theme(legend.position = 'none')
vp3 <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt")) + theme(legend.position = 'none')
vp1.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA"),log=TRUE)+ theme(legend.position = 'none')
vp2.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA"),log=TRUE)+ theme(legend.position = 'none')
vp3.log <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt"),log=TRUE) + theme(legend.position = 'none')
grid.arrange(vp1,vp2,vp3, nrow=1)
grid.arrange(vp1.log,vp2.log,vp3.log, nrow=1)
FeatureScatter(merge.filtered, "nCount_RNA", "percent.mt", pt.size=0.5) + scale_y_continuous(limits=c(0,100)) + scale_x_continuous(limits=c(0,100000))
FeatureScatter(merge.filtered, "nCount_RNA", "nFeature_RNA", pt.size=0.5) + scale_y_continuous(limits=c(0,12000)) + scale_x_continuous(limits=c(0,100000))
merge.list <- SplitObject(merge.filtered, split.by = "orig.ident")
merge.list <- lapply(X = merge.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, verbose = FALSE)
})
features <- SelectIntegrationFeatures(object.list = merge.list)
merge.list <- lapply(X = merge.list, FUN = function(x) {
x <- ScaleData(x, features = features, verbose = FALSE)
x <- RunPCA(x, features = features, verbose = FALSE)
})
anchors <- FindIntegrationAnchors(object.list = merge.list, reference = c(1, 5, 6, 11, 12), reduction = "rpca",
dims = 1:50 )
totipotency <- IntegrateData(anchorset = anchors, dims = 1:50)
#Save object
DefaultAssay(totipotency) <- "integrated"
save(totipotency, file="Run3.rda")
#load(file='Run3.rda')
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(totipotency), 10)
plot1 <- VariableFeaturePlot(totipotency)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
### Scaling
all.genes <- rownames(totipotency)
totipotency <- ScaleData(totipotency, features = all.genes)
#Omitted Normalization as this is an integrated object
totipotency <- RunPCA(totipotency, npcs=100, features = rownames(totipotency))
print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)
dim(totipotency)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
totipotency <- FindNeighbors(totipotency, dims = 1:50)
totipotency <- FindClusters(totipotency, resolution = 0.4)
totipotency <- RunUMAP(totipotency, dims = 1:50)
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1
p2 <- DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p2
DefaultAssay(totipotency) <- "RNA"
totipotency <- NormalizeData(totipotency)
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst")
g2m_1.genes <- c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80","Cks2","Nuf2","Cks1b", "Mki67", "Tmpo","Cenpf","Tacc3","Fam64a","Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e","Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Hn1","Cdc20","Ttk","Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2","Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf","Nek2","G2e3","Gas2l3","Cbx5","Cenpa")
s_1.genes <- c("Mcm5","Pcna","Tyms","Fen1","Mcm2","Mcm4","Rrm1","Ung","Gins2","Mcm6","Cdca7","Dtl","Prim1","Uhrf1","Mlf1ip","Hells","Rfc2","Rpa2","Nasp","Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2","Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2","Usp1","Clspn","Pola1","Chaf1b","Brip1","E2f8")
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
# view cell cycle scores and phase assignments
head(totipotency[[]])
## orig.ident nCount_RNA nFeature_RNA
## Empty_hygro_1_AAACCCAAGGAAGTAG.1 Empty_vector3 20185 4192
## Empty_hygro_1_AAACCCAGTCGCACGT.1 Empty_vector3 26369 5163
## Empty_hygro_1_AAACCCAGTGTGCTTA.1 Empty_vector3 54120 7692
## Empty_hygro_1_AAACCCATCTGGTCAA.1 Empty_vector3 28287 5744
## Empty_hygro_1_AAACGAAAGTAACCGG.1 Empty_vector3 30029 5261
## Empty_hygro_1_AAACGAAGTCCTGGTG.1 Empty_vector3 19055 4861
## percent.mt in.vivo integrated_snn_res.0.4
## Empty_hygro_1_AAACCCAAGGAAGTAG.1 4.230865 <NA> 1
## Empty_hygro_1_AAACCCAGTCGCACGT.1 3.985741 <NA> 1
## Empty_hygro_1_AAACCCAGTGTGCTTA.1 4.477088 <NA> 0
## Empty_hygro_1_AAACCCATCTGGTCAA.1 4.008909 <NA> 0
## Empty_hygro_1_AAACGAAAGTAACCGG.1 4.878617 <NA> 1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1 4.733666 <NA> 2
## seurat_clusters S.Score G2M.Score Phase
## Empty_hygro_1_AAACCCAAGGAAGTAG.1 1 -0.05283798 -0.310921404 G1
## Empty_hygro_1_AAACCCAGTCGCACGT.1 1 0.14582351 -0.317864665 S
## Empty_hygro_1_AAACCCAGTGTGCTTA.1 0 0.09708147 -0.212689429 S
## Empty_hygro_1_AAACCCATCTGGTCAA.1 0 -0.02977374 -0.098052381 G1
## Empty_hygro_1_AAACGAAAGTAACCGG.1 1 -0.18860703 -0.022766625 G1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1 2 -0.16530321 -0.001745498 G1
## old.ident
## Empty_hygro_1_AAACCCAAGGAAGTAG.1 1
## Empty_hygro_1_AAACCCAGTCGCACGT.1 1
## Empty_hygro_1_AAACCCAGTGTGCTTA.1 0
## Empty_hygro_1_AAACCCATCTGGTCAA.1 0
## Empty_hygro_1_AAACGAAAGTAACCGG.1 1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1 2
DimPlot(totipotency, reduction = "umap")
DefaultAssay(totipotency) <- "integrated"
totipotency <- ScaleData(totipotency, vars.to.regress=c("S.Score","G2M.Score"))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix
save(totipotency, file="Run3_cell_cycle_corrected3.rda")
#load(file='Run3_cell_cycle_corrected3.rda')
totipotency <- RunPCA(totipotency, npcs = 100, verbose = FALSE)
totipotency <- RunUMAP(totipotency, reduction = "pca", dims = 1:30)
## 16:07:16 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:07:16 Read 39324 rows and found 30 numeric columns
## 16:07:16 Using Annoy for neighbor search, n_neighbors = 30
## 16:07:16 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:07:23 Writing NN index file to temp file /tmp/RtmpqsbJ4K/fileab3a26e0274b
## 16:07:23 Searching Annoy index using 1 thread, search_k = 3000
## 16:07:37 Annoy recall = 100%
## 16:07:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:07:40 Initializing from normalized Laplacian + noise (using irlba)
## 16:07:41 Commencing optimization for 200 epochs, with 1756584 positive edges
## 16:08:05 Optimization finished
totipotency <- FindNeighbors(totipotency, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
totipotency <- FindClusters(totipotency, resolution = 0.18)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 39324
## Number of edges: 1100254
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9022
## Number of communities: 12
## Elapsed time: 7 seconds
## 2 singletons identified. 10 final clusters.
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1
tot <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10_2 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10_2, repel = TRUE, xnudge=0, ynudge=0)
plot2
print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
## PC_ 1
## Positive: Exoc4, Ranbp17, Adam23, Dst, Trip12
## Negative: Tubb4b, Cdc20, Hspa8, Ube2c, Actb
## PC_ 2
## Positive: L1td1, Klf2, Ldha, Zfp42, Enox1
## Negative: Trp53inp1, Phlda3, Btg2, Ptpn14, Sdc4
## PC_ 3
## Positive: Phlda3, Ptpn14, Krt18, Perp, Cdkn1a
## Negative: Platr31, Gm41409, Gm45659, Gm28940, Gm35974
## PC_ 4
## Positive: Sfmbt2, Col4a1, Col4a2, Hspa5, Calr
## Negative: Ano3, Hist1h2ae, Ahnak2, D630023F18Rik, Trp53inp1
## PC_ 5
## Positive: Hist1h2ae, Hist1h1b, Hist1h1e, Hist1h2ap, Hist1h4d
## Negative: Tcf15, Nanog, Kdm5b, Nr5a2, Epha4
## PC_ 6
## Positive: Ptch1, Map4k4, Utrn, Nrp2, Tfap2c
## Negative: Gabarapl2, Gm2022, Gm5662, Gm2035, Gm5039
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Libraries per cluster")
| Libraries per cluster | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dppa2 | Dppa2_Gata3 | Dppa2_Gata3_MERVL | Dppa2_MERVL | Dppa2_MERVL_TT | Empty_hygro | Empty_puro | Empty_vector3 | EVEV | Gata3 | Gata3_MERVL | MERVL_VP64 | ML11 | |
| 0 | 2287 (72.5%) | 2775 (64.3%) | 1747 (65.6%) | 1317 (63%) | 254 (12.3%) | 2432 (68.2%) | 2904 (70.1%) | 2632 (71.8%) | 1507 (61.6%) | 1539 (38.9%) | 2349 (60.4%) | 1998 (65.6%) | 15 (4.7%) |
| 1 | 390 (12.4%) | 702 (16.3%) | 534 (20%) | 369 (17.7%) | 112 (5.4%) | 680 (19.1%) | 662 (16%) | 503 (13.7%) | 539 (22%) | 412 (10.4%) | 603 (15.5%) | 480 (15.8%) | 3 (0.9%) |
| 2 | 360 (11.4%) | 485 (11.2%) | 265 (9.9%) | 220 (10.5%) | 66 (3.2%) | 287 (8.1%) | 383 (9.2%) | 381 (10.4%) | 196 (8%) | 345 (8.7%) | 498 (12.8%) | 318 (10.4%) | 0 (0%) |
| 3 | 47 (1.5%) | 230 (5.3%) | 43 (1.6%) | 38 (1.8%) | 47 (2.3%) | 121 (3.4%) | 135 (3.3%) | 88 (2.4%) | 129 (5.3%) | 1581 (40%) | 356 (9.2%) | 134 (4.4%) | 127 (40.1%) |
| 4 | 25 (0.8%) | 48 (1.1%) | 31 (1.2%) | 84 (4%) | 1555 (75%) | 1 (0%) | 0 (0%) | 1 (0%) | 4 (0.2%) | 39 (1%) | 39 (1%) | 13 (0.4%) | 21 (6.6%) |
| 5 | 30 (1%) | 25 (0.6%) | 27 (1%) | 12 (0.6%) | 13 (0.6%) | 15 (0.4%) | 27 (0.7%) | 43 (1.2%) | 4 (0.2%) | 38 (1%) | 18 (0.5%) | 76 (2.5%) | 0 (0%) |
| 6 | 14 (0.4%) | 47 (1.1%) | 16 (0.6%) | 23 (1.1%) | 8 (0.4%) | 18 (0.5%) | 16 (0.4%) | 12 (0.3%) | 8 (0.3%) | 0 (0%) | 19 (0.5%) | 26 (0.9%) | 0 (0%) |
| 7 | 0 (0%) | 2 (0%) | 2 (0.1%) | 27 (1.3%) | 5 (0.2%) | 11 (0.3%) | 17 (0.4%) | 3 (0.1%) | 61 (2.5%) | 2 (0.1%) | 3 (0.1%) | 0 (0%) | 2 (0.6%) |
| 8 | 0 (0%) | 1 (0%) | 0 (0%) | 0 (0%) | 12 (0.6%) | 0 (0%) | 0 (0%) | 1 (0%) | 0 (0%) | 0 (0%) | 3 (0.1%) | 2 (0.1%) | 116 (36.6%) |
| 9 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 33 (10.4%) |
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Embryo cells per cluster")
| Embryo cells per cluster | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16cell | 4cell | 8cell | BXC | C57twocell | early2cell | earlyblast | fibroblast | late2cell | lateblast | mid2cell | midblast | zygote | |
| 0 | 4 (6.9%) | 0 (0%) | 1 (2.1%) | 1 (7.7%) | 0 (0%) | 0 (0%) | 6 (14%) | 0 (0%) | 0 (0%) | 2 (6.7%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| 1 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (30%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 2 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 3 | 0 (0%) | 0 (0%) | 1 (2.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 33 (76.7%) | 7 (70%) | 0 (0%) | 28 (93.3%) | 0 (0%) | 58 (96.7%) | 0 (0%) |
| 4 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 9 (90%) | 0 (0%) | 12 (100%) | 0 (0%) | 0 (0%) |
| 5 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 6 | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 7 | 2 (3.4%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| 8 | 51 (87.9%) | 14 (100%) | 45 (95.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (9.3%) | 0 (0%) | 1 (10%) | 0 (0%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| 9 | 1 (1.7%) | 0 (0%) | 0 (0%) | 12 (92.3%) | 8 (100%) | 8 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (100%) |
DefaultAssay(totipotency) <- "RNA"
mus.markers <- FindAllMarkers(totipotency, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'Run3_markers.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)
totipotency <- ScaleData(totipotency, features = all.genes)
VlnPlot(totipotency, features = features_vec, pt.size=0)
#Heatmap
top10 <- mus.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(totipotency, features = top10$gene)
## Warning in DoHeatmap(totipotency, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## Trf, Zbed3, Ahsg, Apoa2, Apoa1, Serpina1b, Gc, LOC100502936, Serpina3k, Alb,
## Isyna1, Pdxk, Gpd1l, Gnb2l1, Gm11517, Tceb1, Alppl2, Timd2, Eif2s2, Rpl29, Sub1,
## Prdx1, Eno1, Snhg12, Gas5, Atp6v0b, 1110038B12Rik, Lamb1, Ung, Usp37, Cdc6,
## Rrm2, Slbp, Dtl
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p1
p1 <- DimPlot(totipotency, reduction = "umap", group.by = "in.vivo", pt.size = 1)
p1
new.cluster.ids <- c("Pluripotent", "Pluripotent mitosis", "Pluripotent apoptosis", "Primitive Endoderm", "Totipotent","Pluripotent ubiquitin", "Pluripotent", "Pluripotent splicing translation", "Trophectoderm", "Zygote, early 2cell, 16 cell, liver")
names(new.cluster.ids) <- levels(totipotency)
totipotency <- RenameIdents(totipotency, new.cluster.ids)
DimPlot(totipotency, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
| Cluster_distribution | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dppa2 | Dppa2_Gata3 | Dppa2_Gata3_MERVL | Dppa2_MERVL | Dppa2_MERVL_TT | Empty_hygro | Empty_puro | Empty_vector3 | EVEV | Gata3 | Gata3_MERVL | MERVL_VP64 | ML11 | |
| Pluripotent | 2301 (73%) | 2822 (65.4%) | 1763 (66.2%) | 1340 (64.1%) | 262 (12.6%) | 2450 (68.7%) | 2920 (70.5%) | 2644 (72.2%) | 1515 (61.9%) | 1539 (38.9%) | 2368 (60.9%) | 2024 (66.4%) | 15 (4.7%) |
| Pluripotent mitosis | 390 (12.4%) | 702 (16.3%) | 534 (20%) | 369 (17.7%) | 112 (5.4%) | 680 (19.1%) | 662 (16%) | 503 (13.7%) | 539 (22%) | 412 (10.4%) | 603 (15.5%) | 480 (15.8%) | 3 (0.9%) |
| Pluripotent apoptosis | 360 (11.4%) | 485 (11.2%) | 265 (9.9%) | 220 (10.5%) | 66 (3.2%) | 287 (8.1%) | 383 (9.2%) | 381 (10.4%) | 196 (8%) | 345 (8.7%) | 498 (12.8%) | 318 (10.4%) | 0 (0%) |
| Primitive Endoderm | 47 (1.5%) | 230 (5.3%) | 43 (1.6%) | 38 (1.8%) | 47 (2.3%) | 121 (3.4%) | 135 (3.3%) | 88 (2.4%) | 129 (5.3%) | 1581 (40%) | 356 (9.2%) | 134 (4.4%) | 127 (40.1%) |
| Totipotent | 25 (0.8%) | 48 (1.1%) | 31 (1.2%) | 84 (4%) | 1555 (75%) | 1 (0%) | 0 (0%) | 1 (0%) | 4 (0.2%) | 39 (1%) | 39 (1%) | 13 (0.4%) | 21 (6.6%) |
| Pluripotent ubiquitin | 30 (1%) | 25 (0.6%) | 27 (1%) | 12 (0.6%) | 13 (0.6%) | 15 (0.4%) | 27 (0.7%) | 43 (1.2%) | 4 (0.2%) | 38 (1%) | 18 (0.5%) | 76 (2.5%) | 0 (0%) |
| Pluripotent splicing translation | 0 (0%) | 2 (0%) | 2 (0.1%) | 27 (1.3%) | 5 (0.2%) | 11 (0.3%) | 17 (0.4%) | 3 (0.1%) | 61 (2.5%) | 2 (0.1%) | 3 (0.1%) | 0 (0%) | 2 (0.6%) |
| Trophectoderm | 0 (0%) | 1 (0%) | 0 (0%) | 0 (0%) | 12 (0.6%) | 0 (0%) | 0 (0%) | 1 (0%) | 0 (0%) | 0 (0%) | 3 (0.1%) | 2 (0.1%) | 116 (36.6%) |
| Zygote, early 2cell, 16 cell, liver | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 33 (10.4%) |
tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
| Cluster_distribution | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16cell | 4cell | 8cell | BXC | C57twocell | early2cell | earlyblast | fibroblast | late2cell | lateblast | mid2cell | midblast | zygote | |
| Pluripotent | 4 (6.9%) | 0 (0%) | 1 (2.1%) | 1 (7.7%) | 0 (0%) | 0 (0%) | 6 (14%) | 0 (0%) | 0 (0%) | 2 (6.7%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| Pluripotent mitosis | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (30%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Pluripotent apoptosis | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Primitive Endoderm | 0 (0%) | 0 (0%) | 1 (2.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 33 (76.7%) | 7 (70%) | 0 (0%) | 28 (93.3%) | 0 (0%) | 58 (96.7%) | 0 (0%) |
| Totipotent | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 9 (90%) | 0 (0%) | 12 (100%) | 0 (0%) | 0 (0%) |
| Pluripotent ubiquitin | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Pluripotent splicing translation | 2 (3.4%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) |
| Trophectoderm | 51 (87.9%) | 14 (100%) | 45 (95.7%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (9.3%) | 0 (0%) | 1 (10%) | 0 (0%) | 0 (0%) | 1 (1.7%) | 0 (0%) |
| Zygote, early 2cell, 16 cell, liver | 1 (1.7%) | 0 (0%) | 0 (0%) | 12 (92.3%) | 8 (100%) | 8 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 4 (100%) |
DimPlot(totipotency, reduction = "umap", split.by = "orig.ident", ncol = 3, pt.size = 2)
save(totipotency, file="Run3_final.rda")
#load("Run3_final.rda")
DefaultAssay(totipotency) <- "RNA"
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
DimPlot(totipotency, reduction = "umap")
#Cell cycle bar chart
totipotency@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
geom_bar(position = "fill",size = 3, width = .8) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 11)) +
labs(x = "", y = "Cell fraction")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
#Feature plot
FeaturePlot(totipotency, features = "MERVL")
#Plotting DMTT
#Idents(object = totipotency) <- "orig.ident"
#highlight <- WhichCells(totipotency, ident = "Dppa2_MERVL_TT")
#DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))
#Subsetting on totipotency
Idents(object = totipotency) <- "seurat_clusters"
tot <- subset(x = totipotency, idents = "4")
DimPlot(tot, reduction = "umap")
save(tot, file="Run3_final_tot_cluster.rda")
tot <- FindVariableFeatures(tot, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
all.genes <- rownames(tot)
tot <- ScaleData(tot, features = all.genes)
tot <- RunPCA(tot, npcs=100, features = rownames(tot))
print(tot[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(tot, dims = 1:6, reduction = "pca")
DimPlot(tot, reduction = "pca")
DimHeatmap(tot, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(tot, ndims=100)
dim(tot)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
tot <- FindNeighbors(tot, dims = 1:50)
tot <- FindClusters(tot, resolution = 0.2)
tot <- RunUMAP(tot, dims = 1:50)
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)
p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
p1
## Normalise and find variable features
DefaultAssay(tot) <- "RNA"
tot <- NormalizeData(tot)
tot <- FindVariableFeatures(tot, selection.method = "vst")
tbl_cell <- table(tot@active.ident, tot@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add
## Dppa2 Dppa2_Gata3 Dppa2_Gata3_MERVL Dppa2_MERVL Dppa2_MERVL_TT Empty_hygro
## 1 0 (0%) 1 (2.1%) 0 (0%) 1 (1.2%) 809 (52%) 0 (0%)
## 2 1 (4%) 3 (6.2%) 0 (0%) 1 (1.2%) 746 (48%) 0 (0%)
## 3 24 (96%) 44 (91.7%) 31 (100%) 82 (97.6%) 0 (0%) 1 (100%)
## 4 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
## Empty_vector3 EVEV Gata3 Gata3_MERVL MERVL_VP64 ML11 rownames
## 1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0
## 2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1
## 3 1 (100%) 4 (100%) 39 (100%) 39 (100%) 13 (100%) 0 (0%) 2
## 4 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 21 (100%) 3
DefaultAssay(tot) <- "RNA"
tot.markers <- FindAllMarkers(tot, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(tot.markers, file = 'Tot_markers.xlsx')
tot.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)
#Heatmap
top10 <- tot.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(tot, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 4.5))
p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)
p1
p1 <- DimPlot(tot, reduction = "umap", group.by = "in.vivo")
p1
save(tot, file="Tot_reclustered.rda")